Completed DataCamp assessments of R programming.
Finished GitHub tutorial
on version control and collaboration.
Read Analyzing US
Census Data by Kyle Walker.
Did exploratory analysis and visualization on M&M Candy data
set with newly gained skills.
tidycensus instructed in
workshops,
and did an exercise to find the county in Iowa with the lowest median
household income based on the 2020 American Community Survey.var <- load_variables(2020, "acs5", cache = TRUE)
# View(var)
ia <- get_acs(
geography = "county",
variables = c(medincome = "B19013_001"),
state = "IA",
year = 2020
)
iaRearranged <- ia %>%
arrange(estimate) %>%
head(5)
iaRearranged$NAME[iaRearranged$estimate == max(iaRearranged$estimate)]
## [1] "Decatur County, Iowa"
Basic mapping with ggplot2 and ggmap Visualizing
Geospatial Data in R
get_decennial()get_ace()tigriscounties(cb = TRUE) and
plot(add = TRUE).census_api_key().Sys.getenv().ilIncome <- get_acs(geography = "state",
variables = c(medHousehold = "B19013_001", medFamily = "B19113_001", Capita = "B19301_001"),
state = "IL")
## Getting data from the 2016-2020 5-year ACS
sdIncome <- get_acs(geography = "state",
variables = c(medHousehold = "B19013_001", medFamily = "B19113_001", Capita = "B19301_001"),
state = "SD")
## Getting data from the 2016-2020 5-year ACS
Income <- full_join(ilIncome, sdIncome)
## Joining, by = c("GEOID", "NAME", "variable", "estimate", "moe")
# Barplot for incomes in Illinois and South Dakota
ggplot(Income, aes(x = NAME, weight = estimate)) +
geom_bar() +
facet_grid(~ variable) +
labs(title = "Median Income in the Past 12 Months in 2020 Inflation-adjusted Dollars",
subtitle = "2016-2020 American Community Survey",
x = "State",
y = "ACS Estimated Income") +
scale_y_continuous(labels=scales::dollar_format(), limits = c(0, 90000)) +
geom_errorbar(aes(ymin = estimate - moe,
ymax = estimate + moe,
width = 0.5,
color = "Margin of error")) +
geom_text(aes(y = estimate,
label = sprintf("$%0.0f", estimate),
vjust = -1))
ilOcc <- get_acs(geography = "state",
variables = c(Management_business_science_arts = "C24050_015",
Service = "C24050_029",
Sales_office = "C24050_043",
NaturalResources_construction_maintenance = "C24050_057",
ProductionTransportation_materialMoving = "C24050_071"),
state = "IL")
## Getting data from the 2016-2020 5-year ACS
ilOcc <- ilOcc %>% rename(occupation = variable) %>%
mutate(percent = round(estimate / sum(estimate) * 100, 2))
sdOcc <- get_acs(geography = "state",
variables = c(Management_business_science_arts = "C24050_015",
Service = "C24050_029",
Sales_office = "C24050_043",
NaturalResources_construction_maintenance = "C24050_057",
ProductionTransportation_materialMoving = "C24050_071"),
state = "SD")
## Getting data from the 2016-2020 5-year ACS
sdOcc <- sdOcc %>% rename(occupation = variable) %>%
mutate(percent = round(estimate / sum(estimate) * 100, 2))
Occupation <- full_join(ilOcc, sdOcc)
## Joining, by = c("GEOID", "NAME", "occupation", "estimate", "moe", "percent")
# Barplot for occupations in Illinois and South Dakota
ggplot(Occupation, aes(x = NAME, weight = estimate)) +
geom_bar() +
facet_grid(~ occupation) +
labs(title = "Industry by Occupation for the Civilian Employed Population 16 Years and Over",
subtitle = "2015-2020 American Community Survey",
x = "State",
y = "ACS Estimated Population") +
geom_errorbar(aes(ymin = estimate - moe,
ymax = estimate + moe,
width = 0.5,
color = "Margin of error")) +
geom_text(aes(y = estimate,
label = estimate,
vjust = -1))
# Pie chart for occupations in Illinois
ggplot(ilOcc, aes(x = "", y = estimate, fill = occupation)) +
geom_bar(stat = "identity") +
coord_polar("y", start = 0) +
labs(title = "Industry by Occupation in Illinois",
subtitle = "2015-2020 American Community Survey") +
geom_text(aes(label = paste0(percent, "%")),
position = position_stack(vjust = 0.5)) +
theme_void()
# Pie chart for occupations in South Dakota
ggplot(sdOcc, aes(x = "", y = estimate, fill = occupation)) +
geom_bar(stat = "identity") +
coord_polar("y", start = 0) +
labs(title = "Industry by Occupation in South Dakota",
subtitle = "2015-2020 American Community Survey") +
geom_text(aes(label = paste0(percent, "%")),
position = position_stack(vjust = 0.5)) +
theme_void()
x@slot_nameslot(x, "slot_name")[[...]]) to extract
an element in a slot.$ and [[ subsetting on a
Spatial___DataFrame pulls columns directly from the data
frame. That is, if x is a Spatial___DataFrame
object, then either x$col_name or
x[["col_name"]] pulls out the col_name column
from the data frame.in_asia <- countries_spdf$region == "Asia"countries_spdf[in_asia, ]ggplot2 expects data in data frames, tmap
expects data in spatial objects.ggplot2 plot: using the
coord_map() function.tmap, tm_shape() takes an argument
projection that allows you to swap projections for the plot.tmap_save()
saves tmap plot to a file. E.g. The extension of the file name specifies
the file type, for example .png or .pdf for
static plots. .html can save an interactive version which
leverages the leaflet package.get_estimates
gets data from the US Census Bureau Population Estimates APIs.